groups:
  - name: Host-down
    rules:
      - alert: InstanceDown
        expr: up == 0
        for: 5m
        labels:
          severity: page
        annotations:
          summary: "Instance {{ $labels.instance }} down"
          description: "{{ $labels.instance }} of job {{ $labels.job }} has been down for more than 5 minutes."
 
  - name: Disk-usage
    rules:
    - alert: 'Low data disk space'
      expr: ceil(((node_filesystem_size_bytes{mountpoint!="/boot"} - node_filesystem_free_bytes{mountpoint!="/boot"}) / node_filesystem_size_bytes{mountpoint!="/boot"} * 100)) > 95
      labels:
        severity: "critical"
      annotations:
        title: "Disk Usage"
        description: 'Partition : {{$labels.mountpoint}}'
        summary: "Disk usage is {{humanize $value}}%"
        host: "{{$labels.instance}}" 

  - name: Memory-usage
    rules:
    - alert: 'High memory usage'
      expr: ceil((((node_memory_MemTotal_bytes - node_memory_MemFree_bytes - node_memory_Buffers_bytes - node_memory_Cached_bytes) / node_memory_MemTotal_bytes) * 100)) > 80
      labels:
        severity: "critical"
      annotations:
        title: "Memory Usage"
        description: 'Memory usage threshold set to 80%.'
        summary: "Memory usage is {{humanize $value}}%"
        host: "{{$labels.instance}}"

  - name: CPU-Hight-Load
    rules: 
    - alert: HighSystemLoad
      expr: systemload_average > 90
      for: 5s
      labels:
        severity: "critical"
      annotations:
        title: "Memory Usage"
        summary: "High system load: {{ $value | printf \"%.2f\" }}%"
        host: "{{$labels.instance}}"